This code implements all statistical analyses and creates figures for “Longitudinal development of brain iron is linked to cognition in youth” (Larsen, et al., 2019).


Relevant functions:

GAMM modeling

# This function:
# 1. executes the GAMM model,
# 2. sends output to the parametric bootstrap (if requested),
# 3. prints a regression table, and 
# 4. sends the model to the visualizer for plotting.

gamm_model <- function(df, model_formula, this_label, smooth_var, int_var = NULL,group_var, pbootstrap = F, longPlot = F, model_test = T){
  cat(sprintf("\n\n### Results for %s\n",this_label))
  
 model_formula <- as.formula(model_formula)
  if(!"exclusions" %in% colnames(df))  {
    df$exclusions <- FALSE; #there is no exclusions column so make one that excludes none
  }
  g1<-gamm(model_formula,
           data=df,
           random = list(bblid =~ 1),
           subset = exclusions == F)
  
  if (model_test == T){
    if (pbootstrap == T) {
      #Send to bootstrap function
      g1$pb<-pboot(g1) 
      #Print a table that shows the bootstrap outcome
      print(g1$pb %>%
        summary() %>%
        .$test %>%
        as.data.frame() %>%
        kable(caption = sprintf("Parametric Bootstrap test for %s",this_label)) %>%
        kable_styling(full_width = F, position = "left",bootstrap_options = c("striped"))
      )
      
      if (isTRUE(all.equal(g1$pb$bestmod,model_formula))) {
        cat("The initial (more complicated) model is best")
        g <- g1
        
        # Refit the model without fixed df for the purpose of plotting
        plot_formula <- as.formula(gsub(", fx = T","",deparse(model_formula)))
        cat(deparse(plot_formula))
        plotg<-g
        plotg <- gamm(plot_formula,
                      data = df,
                      random = list(bblid =~1),
                      subset = exclusions == F)
      } else {
        cat("The simpler model is best")
        cat("   refitting  ")
        g <-gamm(as.formula(g1$pb$bestmod),
                 data=df,
                 random = list(bblid =~ 1),
                 subset = exclusions == F)
        
        #Again, this part not implemented (model not refit with unfixed df)
        plot_formula <- as.formula(gsub("ti\\(",'te\\(',deparse(g$gam$formula)) %>% gsub(", fx = T", "", .))
        plotg<-g
        plotg <-gamm(plot_formula,
                 data=df,
                 random = list(bblid =~ 1),
                 subset = exclusions == F)
      }
    } else {
      if (!is.null(int_var)) {
        # We are not bootstrapping, but there is an interaction variable
        s<-summary(g1$gam)
        if (s$s.table[grep(x=rownames(s$s.table),pattern = int_var),"p-value"] <.05/4)  {
          #Checked if interaction is sig, if so keep in the model
          g <- g1
          plot_formula <- as.formula(gsub(", fx = T", "", deparse(model_formula)))
          plotg <- g
        } else {
          #Interaction is not sig, remove from the model
          cat("The simpler model is best")
          thisResp <- as.character(g1$gam$terms[[2]])
          theseVars <- attr(terms(model_formula),"term.labels")
          new_formula <- reformulate(theseVars[0:(length(theseVars)-1)],response = thisResp)
          
          g <-gamm(as.formula(new_formula),
                   data=df,
                   random = list(bblid =~ 1),
                   subset = exclusions == F)
          plot_formula <- as.formula(gsub("ti\\(",'te\\(',deparse(g$gam$formula)) %>% gsub(", fx = T", "", .))
          plotg<-gamm(as.formula(plot_formula),
                   data=df,
                   random = list(bblid =~ 1),
                   subset = exclusions == F)
      }
      } else {
        #There is no interaction term, just plot.
        g <- g1
          plot_formula <- as.formula(gsub(", fx = T", "", deparse(model_formula)))
          plotg <-gamm(as.formula(plot_formula),
                   data=df,
                   random = list(bblid =~ 1),
                   subset = exclusions == F)
      }
    }
  } else {
    g <- g1
    plotg<-g
  }

  g$gam$data<-df %>%
    filter(exclusions == F)
  
  #Display model results:
  s_tidytable<- tidy(g$gam)
  p_tidytable <- tidy(g$gam,parametric = T)
  snames = names(s_tidytable)
  pnames = names(p_tidytable)
  names(s_tidytable)=pnames
  thisBIC <- BIC(g$lme)
  numObs <- g$lme$dims$N
  g$BIC <- thisBIC
  print(concurvity(g$gam)%>%kable(caption = "convurvity")%>%kable_styling(full_width = F,bootstrap_options = "striped",position = "left"))
  stattable <- rbind(p_tidytable,snames,s_tidytable) %>%
    kable(caption = sprintf("Regression table from gamm in %s, BIC = %1.2f, obs = %d",this_label,thisBIC,numObs)) %>% 
    kable_styling(full_width = F, position = "left")
  
  print(stattable)
  write.csv(x = rbind(p_tidytable,snames,s_tidytable),file = sprintf("GAMM_table_%s.csv",this_label),row.names = F)
  cat(sprintf("Regression table from gamm in %s, BIC = %1.2f, obs = %d\n",this_label,thisBIC,numObs),
      file = sprintf("GAMM_table_%s.txt",this_label))
  cat(sprintf("Bootstrap p value %1.5f",g1$pb$test["PBtest","p.value"]),
      file = sprintf("GAMM_table_%s.txt",this_label),
      append = T)


  #Send final model to visualizer:
  if (longPlot == T) {
      g$pl <- longitudinal_plot(g,plabels = this_label)
    } else{
    if (s_tidytable$p.value[nrow(s_tidytable)]<1.05) {
        g$pl <- visualize_models(plotg,plabels = this_label, smooth_var = smooth_var, int_var = int_var, group_var = group_var)
    }
    }
  #Return result object
  result <- g
  
  return(result)
}

Bootstrap procedure

## Parametric bootstrap of likelihood ratio test for nested models
pboot <- function(modelobj){
  numsims <- 1000

  df <- modelobj$gam$model
  thisResp <- as.character(modelobj$gam$terms[[2]])
  f1 <- modelobj$gam$formula
  theseVars <- attr(terms(f1),"term.labels")
  f2 <- reformulate(theseVars[0:(length(theseVars)-1)],response = thisResp)
  
  g1 <- gam(f1,data = df)
  g2 <- gam(f2,data = df)

  mat1 <- model.matrix(g1)
  mat2 <- model.matrix(g2)

  bblid<-df$bblid
  y <- df[,thisResp]
  
  m1 <- lmer(y ~ -1 + mat1 + (1|bblid))
  m2 <- lmer(y ~ -1 + mat2 + (1|bblid))
  refdist <- PBrefdist(m1, m2, nsim=numsims)#, cl=cl)
  pb <- PBmodcomp(m1, m2, ref = refdist)
  int_pval <- pb$test["PBtest","p.value"]
  if (int_pval < .05/4) {
    pb$bestmod <- f1
  } else {
    pb$bestmod <- f2
  }
  return(pb)
}

Visualize models

Load the data

# Load the datafile
dataFile <- "brain_iron_data.Rda"
dataTable <- readRDS(file=dataFile)

## Create the final sample
### Remove
### 1. Projects with incompatible data
dataTable <- dataTable %>%
  filter(bblid != 110689)%>%
  filter(!(ProjectName %in% c("DAY2_808799",
                              "FNDM1_810211",
                              "FNDM2_810211",
                              "NEFF_V2",
                              "NEFF_PILOT"
                              )
           )
         ) #Not using data from these projects or participants with no scans.
### 2. Sequences with odd parameters
dataTable <- dataTable %>%
  filter(sequence != 'B0map_v4_matchedFOV' & sequence != "B0map") %>% # Different voxel sizes
  filter(sequence != 'B0map_onesizefitsall_v3_T2S') %>% # small number of scans with odd params
  filter(!is.na(GOOD)&!is.na(Putamen)) # No r2* data available or no QC possible
#Print some info
summaryTable<-dataTable%>%group_by(bblid,sex)%>%summarize(n=n())
cat(sprintf('Initial sample includes:\n %d individuals aged %1.2f - %1.2f (M = %1.3f, SD = %.3f)\n (M/F = %d/%d)\n%d observations', 
        length(unique(dataTable$bblid)),
        min(dataTable$age,na.rm=T),
        max(dataTable$age[dataTable$visit==1],na.rm=T),
        mean(dataTable$age[dataTable$visit==1],na.rm=T),
        sd(dataTable$age[dataTable$visit==1],na.rm=T),
        sum(summaryTable$sex=='male'),
        sum(summaryTable$sex=='female'),
        length(dataTable$bblid)))
## Initial sample includes:
##  1543 individuals aged 8.17 - 26.92 (M = 15.237, SD = 3.738)
##  (M/F = 728/815)
## 2321 observations
### 3. Subjects that don't meet inclusion
cat(sprintf('\nHealth exclusions: %d subs, %d scans',
            length(unique(dataTable$bblid[dataTable$ltnExcludev2==1])),
            sum(dataTable$ltnExcludev2==1,na.rm = T)))
## 
## Health exclusions: 309 subs, 419 scans
dataTable <- dataTable %>% 
    filter(ltnExcludev2 ==0 | is.na(ltnExcludev2)) # exclude unhealthy/medicated subs
### 4. Scans that don't pass QA
cat(sprintf('\nQC exclusions: %d scans fail (%d pass)',
            sum(dataTable$GOOD==0,na.rm = T),
            sum(dataTable$GOOD %in% c("1","2"),na.rm = T)))
## 
## QC exclusions: 641 scans fail (1236 pass)
dataTable <- dataTable %>%
  filter(GOOD %in% c("1","2")) # keep only subjects that pass QA

## Count visits  
dataTable <- dataTable %>% 
  group_by(bblid) %>% 
  mutate(visitnum = min_rank(ScanAgeMonths)) %>%
  ungroup()
  dataTable$visitnum <- ordered(dataTable$visitnum)
summaryTable<-dataTable%>%group_by(bblid,sex)%>%summarize(n=n())
cat(sprintf('\nFinal sample includes:\n %d individuals aged %1.2f - %1.2f (M = %1.3f, SD = %.3f)\n (M/F = %d/%d)\n %d observations', 
        length(unique(summaryTable$bblid)),
        min(dataTable$age,na.rm=T),
        max(dataTable$age[dataTable$visit==1],na.rm=T),
        mean(dataTable$age[dataTable$visit==1],na.rm=T),
        sd(dataTable$age[dataTable$visit==1],na.rm=T),
        sum(summaryTable$sex=='male'),
        sum(summaryTable$sex=='female'),
        length(dataTable$bblid)))
## 
## Final sample includes:
##  922 individuals aged 8.17 - 26.92 (M = 15.146, SD = 3.724)
##  (M/F = 436/486)
##  1236 observations
# Create sample description figure (Fig 1)
sortedBBLID<-dataTable %>% group_by(bblid) %>% summarise(m=min(age)) %>% arrange(m)
sortedBBLID$row<-as.numeric(rownames(sortedBBLID))
newDataTable<-dataTable%>%left_join(sortedBBLID%>%select(bblid,row),by="bblid")
fig1<-ggplot(data = newDataTable,aes(x=reorder(row,age,FUN = min),y=age,color=oSex)) + 
  geom_point(size=.5,alpha=.5) + geom_line(alpha=.5) +
  scale_x_discrete(breaks = c(1,500,1000,length(sortedBBLID$bblid))) +
  coord_flip(clip = "off") + 
  scale_color_manual(values = c("blue","red"),labels=c("Male","Female"))+ 
  labs(y = "Age (years)",x="Participant",color="") +theme(legend.position = c(.8,.2))
print(fig1)

ggsave("Fig1.svg",device = "svg",plot = fig1,width = 85,height = 85,units = "mm",bg = "transparent")
## make data frame for participants that have cognition data.
behTable <- dataTable %>% 
  filter(!is.na(NAR_Overall_Efficiency)) %>%
  filter(scan2cnbmonths<=6)
summaryTable<-behTable%>%group_by(bblid,sex)%>%summarize(n=n())
cat(sprintf('\nBehavior sample includes:\n %d individuals aged %1.2f - %1.2f (M = %1.3f, SD = %.3f)\n (M/F = %d/%d)\n %d observations', 
        length(unique(summaryTable$bblid)),
        min(behTable$age,na.rm=T),
        max(behTable$age[behTable$visit==1],na.rm=T),
        mean(behTable$age[behTable$visit==1],na.rm=T),
        sd(behTable$age[behTable$visit==1],na.rm=T),
        sum(summaryTable$sex=='male'),
        sum(summaryTable$sex=='female'),
        length(behTable$bblid)))
## 
## Behavior sample includes:
##  818 individuals aged 8.17 - 26.92 (M = 14.840, SD = 3.574)
##  (M/F = 389/429)
##  1067 observations

Combat harmonization

## Combat harmonization
# First harmonize for age models
comtable <- dataTable %>%
  select(Caudate,Putamen,Accumbens_Area,Pallidum,sequence,age,oSex,bblid,scanid,visitnum) %>%
  drop_na()
batch <- as.character(comtable$sequence)
ctab <- t(data.matrix(comtable%>%select(Caudate,Putamen,Accumbens_Area,Pallidum)))
g<-gamm(Putamen ~ visitnum + s(age, k = 4, fx = T) + oSex + s(age, by = oSex, k = 4, fx = T) ,data=comtable,random=list(bblid=~1))
mod<- model.matrix(g$gam)
combatdata <- combat(ctab,batch,mod=mod, eb = F)
## [combat] Performing ComBat without empirical Bayes (L/S model)
## [combat] Found 4 batches
## [combat] Adjusting for 9 covariate(s) or covariate level(s)
## [combat] Standardizing Data across features
## [combat] Fitting L/S model
## [combat] Adjusting the Data
harmonized_data<-data.frame(t(combatdata$dat.combat))
colnames(harmonized_data)<- paste(colnames(harmonized_data),"h",sep = "_")
harmonized_data$bblid=comtable$bblid
harmonized_data$scanid=comtable$scanid
dataTable<- dataTable %>%
  left_join(harmonized_data,by = c("bblid","scanid"))

# Now for Behavior models
comtable <- behTable %>%
  select(Caudate,Putamen,Accumbens_Area,Pallidum,sequence,age,NAR_Overall_Efficiency,oSex,bblid,scanid) %>%
  drop_na()
batch <- as.character(comtable$sequence)
ctab <- t(data.matrix(comtable%>%select(Caudate,Putamen,Accumbens_Area,Pallidum)))
g<-gamm(Putamen ~ s(age, k = 4, fx = T) + s(age, by = NAR_Overall_Efficiency, k = 4, fx = T) + oSex,data=comtable,random=list(bblid=~1))
mod<- model.matrix(g$gam)
combatdata <- combat(ctab,batch,mod=mod, eb = F)
## [combat] Performing ComBat without empirical Bayes (L/S model)
## [combat] Found 3 batches
## [combat] Adjusting for 8 covariate(s) or covariate level(s)
## [combat] Standardizing Data across features
## [combat] Fitting L/S model
## [combat] Adjusting the Data
harmonized_data<-data.frame(t(combatdata$dat.combat))
colnames(harmonized_data)<- paste(colnames(harmonized_data),"h",sep = "_")
harmonized_data$bblid=comtable$bblid
harmonized_data$scanid=comtable$scanid
behTable<- behTable %>%
  left_join(harmonized_data,by = c("bblid","scanid"))

longTable <- dataTable %>%
    gather(key = "ROI",value="R2s",Accumbens_Area_h,Putamen_h,Caudate_h,Pallidum_h)

rois <- c("Caudate_h","Putamen_h","Accumbens_Area_h","Pallidum_h")

ROI comparison

Analyses for figure 3

source("/Users/larsenb/Documents/Tools/RainCloudPlots-master/tutorial_R/R_rainclouds.R") #requires RainCloud package: https://github.com/RainCloudPlots/RainCloudPlots/blob/master/tutorial_R/R_rainclouds.R

longTable$ROI<-factor(longTable$ROI,levels = c("Caudate_h","Putamen_h","Accumbens_Area_h","Pallidum_h"),labels = c("Caudate","Putamen","Nucleus Accumbens","Pallidum"))
longTable$oldROI <- c("Caudate","Putamen","Accumbens_Area","Pallidum")

l<-lmerTest::lmer(R2s ~ sex+age+ROI+(1|bblid),data = longTable)
m<-psycho::get_means(fit = l,formula="ROI")
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, set emm_options(pbkrtest.limit = 4944) or larger,
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, set emm_options(lmerTest.limit = 4944) or larger,
## but be warned that this may result in large computation time and memory use.
emmeans_table<- kable(m) %>% 
  kable_styling(bootstrap_options = "striped",position = "left")
print(emmeans_table)
ROI Mean SE df CI_lower CI_higher
Caudate 15.17269 0.0552106 Inf 15.06448 15.28090
Putamen 16.95906 0.0552106 Inf 16.85085 17.06727
Nucleus Accumbens 17.58602 0.0552106 Inf 17.47781 17.69423
Pallidum 22.33203 0.0552106 Inf 22.22382 22.44024
c <- psycho::get_contrasts(l,"ROI",adjust="bonf")
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, set emm_options(pbkrtest.limit = 4944) or larger,
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, set emm_options(lmerTest.limit = 4944) or larger,
## but be warned that this may result in large computation time and memory use.
contrast_table<- kable(c) %>% 
  kable_styling(bootstrap_options = "striped",position = "left")
print(contrast_table)
Contrast Difference SE df z p CI_lower CI_higher
Caudate - Putamen -1.7863742 0.0676308 Inf -26.413634 0 -1.9648016 -1.607947
Caudate - Nucleus Accumbens -2.4133326 0.0676308 Inf -35.683948 0 -2.5917600 -2.234905
Caudate - Pallidum -7.1593439 0.0676308 Inf -105.859280 0 -7.3377713 -6.980917
Putamen - Nucleus Accumbens -0.6269584 0.0676308 Inf -9.270314 0 -0.8053857 -0.448531
Putamen - Pallidum -5.3729697 0.0676308 Inf -79.445646 0 -5.5513970 -5.194542
Nucleus Accumbens - Pallidum -4.7460113 0.0676308 Inf -70.175332 0 -4.9244386 -4.567584
write.csv(c,'EMMeans_contrast.csv',row.names = F)

fig3<-ggplot(data=longTable,aes(x = reorder(ROI,-R2s,FUN = mean,na.rm=T),y = R2s,fill=ROI,color = ROI)) + 
  geom_flat_violin(show.legend = F,position=position_nudge(x=.2),alpha = .4) + 
  labs(x=NULL,y = "R2* (1/sec)",font_size=font_size) +
  geom_point(size=.25,alpha=.4,position = position_jitter(width = .1),show.legend = F) +
  geom_boxplot(color = "black",fill=NA,outlier.shape = NA,width=.1,show.legend = F) +
  scale_color_brewer(palette = "Set1") + scale_fill_brewer(palette = "Set1") +
  scale_x_discrete(labels=c("Globus\nPallidus","Nucleus\nAccumbens","Putamen","Caudate"))
fig3

ggsave(plot=fig3,"Fig3.svg",device = "svg",width = 85,height = 85,units = "mm",bg = "transparent")

Developmental effects

Analyses for figure 4.
Fit nonlinear age effect and check for age*sex interaction (factor smooth interaction: s(age, by = oSex)) oSex is Sex specified as an ordered factor to appropriately test for the interaction (female against male reference)

model_formula <- "R2s ~ visitnum +oSex + oRace_bwo +s(age, k = 4, fx = T) + s(age, by = oSex, k = 4, fx = T) "
## This will perform model testing. If the age * sex interaction is significant, it is retained, otherwise it will return an age + sex model.
models <- longTable %>%
    group_by(ROI)%>%
    nest()%>%
    mutate(results=purrr::pmap(list(data,model_formula,this_label = ROI, pbootstrap = T, longPlot = T),.f=gamm_model))

Results for Nucleus Accumbens

Parametric Bootstrap test for Nucleus Accumbens
stat df ddf p.value
PBtest 1.9757334 NA NA 0.5664336
Gamma 1.9757334 NA NA 0.5587682
Bartlett 2.0591528 3 NA 0.5602205
F 0.6585778 3 3.064699 0.6292186
LRT 1.9757334 3 NA 0.5774586
The simpler model is best refitting
convurvity
para s(age)
worst 0.7753364 0.1211547
observed 0.7753364 0.0825105
estimate 0.7753364 0.1032679
Regression table from gamm in Nucleus Accumbens, BIC = 5844.61, obs = 1235
term estimate std.error statistic p.value
(Intercept) 17.4861600325314 0.149943218021533 116.618545761905 0
visitnum.L -0.0239854377469602 0.231762288339197 -0.103491546958909 0.917589783885288
visitnum.Q 0.0510209690673487 0.15345724144422 0.33247677716072 0.739586109756318
oSex.L 0.288357604672805 0.118627761102667 2.43077675910316 0.0152091226580745
oRace_bwo.L -0.412873371047967 0.197447871953272 -2.09104999189698 0.0367290336123828
oRace_bwo.Q -0.00173326477297276 0.153024269604343 -0.011326731226715 0.990964612066546
term edf ref.df statistic p.value
s(age) 3 3 41.5735818338385 9.49021159068033e-26
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.

Sig change: 8.17 - 17.12

Results for Putamen

Parametric Bootstrap test for Putamen
stat df ddf p.value
PBtest 8.96187 NA NA 0.0299700
Gamma 8.96187 NA NA 0.0305345
Bartlett 8.96842 3 NA 0.0297137
F 2.98729 3 3.001097 0.1963283
LRT 8.96187 3 NA 0.0298022
The simpler model is best refitting
convurvity
para s(age)
worst 0.7753364 0.1211547
observed 0.7753364 0.1106914
estimate 0.7753364 0.1032679
Regression table from gamm in Putamen, BIC = 3944.17, obs = 1235
term estimate std.error statistic p.value
(Intercept) 17.0837970708726 0.0705330905798225 242.209676769216 0
visitnum.L 0.243981166893351 0.11501228096822 2.12134882326843 0.0340926608837701
visitnum.Q 0.0247822592470953 0.0775572597245464 0.319535003365415 0.749375291685797
oSex.L -0.0293967888188757 0.0529130616975227 -0.555567715716817 0.578607767500187
oRace_bwo.L -0.0980967255048573 0.0881472318610424 -1.11287358018797 0.265980881783258
oRace_bwo.Q 0.155103316728515 0.068270910066593 2.27188002294424 0.023266395689981
term edf ref.df statistic p.value
s(age) 2.99999999999999 2.99999999999999 86.1106136043234 3.18238513963805e-52
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.

Sig change: 10.05 - 26.92

Results for Caudate

Parametric Bootstrap test for Caudate
stat df ddf p.value
PBtest 0.8792533 NA NA 0.8231768
Gamma 0.8792533 NA NA 0.8341081
Bartlett 0.8676429 3 NA 0.8332285
F 0.2930844 3 2.980323 0.8298361
LRT 0.8792533 3 NA 0.8304314
The simpler model is best refitting
convurvity
para s(age)
worst 0.7753364 0.1211547
observed 0.7753364 0.0674739
estimate 0.7753364 0.1032679
Regression table from gamm in Caudate, BIC = 4271.13, obs = 1235
term estimate std.error statistic p.value
(Intercept) 15.269762421595 0.0776776347370658 196.578622318796 0
visitnum.L 0.176402467706872 0.110934580845815 1.59014859353954 0.112059127248107
visitnum.Q -0.00577971594470104 0.0716797635000482 -0.0806324639268259 0.9357474078279
oSex.L 0.146585890971771 0.0658920384329252 2.22463736830646 0.0262870751028674
oRace_bwo.L -0.0586540660610204 0.10959347172685 -0.535196715067203 0.592610896545729
oRace_bwo.Q 0.230240038824217 0.0849748445558743 2.70950820831246 0.00683221278725056
term edf ref.df statistic p.value
s(age) 3 3 9.95646441276744 1.73004055047032e-06
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.

Sig change: 8.17 - 15.89

Results for Pallidum

Parametric Bootstrap test for Pallidum
stat df ddf p.value
PBtest 13.395682 NA NA 0.0019980
Gamma 13.395682 NA NA 0.0021317
Bartlett 13.224946 3 NA 0.0041746
F 4.465227 3 2.981003 0.1261747
LRT 13.395682 3 NA 0.0038546
The initial (more complicated) model is bestR2s ~ visitnum + oSex + oRace_bwo + s(age, k = 4) + s(age, by = oSex, k = 4)
convurvity
para s(age) s(age):oSexfemale
worst 0.7767801 0.6767412 0.6713269
observed 0.7767801 0.5599106 0.5935324
estimate 0.7767801 0.5484832 0.5320300
Regression table from gamm in Pallidum, BIC = 4753.09, obs = 1235
term estimate std.error statistic p.value
(Intercept) 22.4420835917782 0.096170646920529 233.356895377061 0
visitnum.L 0.16947438464697 0.149994146382322 1.12987332328953 0.258751152755715
visitnum.Q -0.041772753271621 0.0997112395016137 -0.418937258030425 0.6753355508665
oSex.L -0.0883740132128409 0.0752483876130313 -1.17443065580765 0.240451197393393
oRace_bwo.L -0.00948850745249523 0.12515802852913 -0.0758122156764943 0.939580896461433
oRace_bwo.Q 0.0685702907730544 0.0971245985102329 0.706003338235986 0.480320522722184
term edf ref.df statistic p.value
s(age) 2.99999999999999 2.99999999999999 84.6072181107431 2.50042565031731e-51
s(age):oSexfemale 3.00000000000001 3 4.46988263857325 0.00395226040160837
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.

Sig change: 8.17 - 25.92

Sig change: 8.17 - 22.21

Modeling of continuous*continuous interactions tests two possible models.
Analyses for figures 5 and 6.

We compare an additive (main effects + interaction) bivariate smooth model and a varying coefficient model (details below). The best model is chosen via the smallest BIC. After the best interaction model is selected, the significance of the interaction is tested with a parametric bootstrap likelihood ratio test. This test compares the model with the interaction term against a simpler nested model with main effects only. If the interaction model is significantly better, we keep that model. If not, the final model is the simpler model with no interaction.

2D smooth with additional main effects and tensor product smooth, ti: ti(age) + ti(Cognition) + ti(age,Cognition)

From documentation: This model specifies a main effects + interaction structure such as: y ~ ti(x) + ti(z) + ti(x,z)

ti is the proper way of specifying an interaction term in the context of included main effect terms:

“This functional ANOVA decomposition is supported by ti terms, which produce tensor product interactions from which the main effects have been excluded, under the assumption that they will be included separately. For example the ~ ti(x) + ti(z) + ti(x,z) would produce the above main effects + interaction structure. This is much better than attempting the same thing with s or te terms representing the interactions (although mgcv does not forbid it). Technically ti terms are very simple: they simply construct tensor product bases from marginal smooths to which identifiability constraints (usually sum-to-zero) have already been applied: correct nesting is then automatic (as with all interactions in a GLM framework). See Wood (2017, section 5.6.3).”

Varying coefficient model (using by =)

This will make the fit linear (rather than non-linear smooth) in the by variable (which is cognition in this case). From documentation: “When using by with a numberic covariate,”the by argument ensures that the smooth function gets multiplied by covariate z"

g<-gamm(NAR_Overall_Efficiency ~ s(age),data = behTable,random = list(bblid=~1))
r <- residuals(g$gam,type = "pearson")
behTable$Overall_Efficiency_resid <- r

g<-gamm(Putamen_h ~ s(age,k=4,fx=T),data = behTable,random = list(bblid=~1))
r <- residuals(g$gam,type = "pearson")
behTable$Putamen_h_resid <- r

# Loop over ROIs
for (r in "Putamen_h") {
  cat(sprintf("\n## Results for %s\n",r))
  # Set the cognitive variables to test
  if (r == "Putamen_h") {
    # Overall efficiency is significant for Putamen, so check the sub-domains of efficiency as well.
    cog_vars <- c("NAR_Overall_Efficiency",
                  "NAR_F1_Social_Cognition_Efficiency",
                  "NAR_F2_Complex_Reasoning_Efficiency",
                  "NAR_F3_Executive_Efficiency",
                  "NAR_F4_Memory_Efficiency")
  } else{
    cog_vars <- c("NAR_Overall_Efficiency")
  }
  # Loop over the cognitive variables
  for (cv in cog_vars) {
    cat(sprintf("\n### %s\n",cv))
    thisTable <- behTable
    
    # First look at a main effects only model
    # Fitting smooths for age and cognition
    add_formula <- sprintf("%s ~ timepoint +oSex + s(age, k=4, fx = T) + s(%s, k=4, fx = T)",r,cv,cv)
    gamm_model(thisTable,
               add_formula,
               this_label = sprintf("%s %s M.E.",r,cv),
               smooth_var = cv,
               group_var = "bblid",
               pbootstrap = T,
               model_test = F)
    
    # Compare the two interaction models
    cat('\n### Comparing interaction models...\n')
    # Bivariate interaction
    bv_formula <- sprintf(
      "%s ~ timepoint +oSex + ti(age, k=4, fx = T) + ti(%s, k=4, fx = T) + ti(age,%s, k=4, fx = T)",
      r,cv,cv,cv)
    # Linear varying coefficient interaction
    vc_formula <- sprintf(
      "%s ~ timepoint + oSex +s(age, k=4, fx = T) + s(age, by = %s, k=4, fx = T)",
      r,cv)

    bv <- gamm(as.formula(bv_formula),
               random = list(bblid=~1),
               data = thisTable)
    vc <- gamm(as.formula(vc_formula),
               random = list(bblid=~1),
               data = thisTable)
    bic<-BIC(bv$lme,vc$lme) # get BIC
    bestmod <- gsub(row.names(bic)[which.min(bic$BIC)],pattern = "$lme",replacement = "", fixed = T) #best is min BIC
    
    #confirm there are no concurvity issues if the VC model is the best.
    c<-as.data.frame(concurvity(vc$gam))
    if (c["observed",contains(vars = names(c),"s(age):")]>.75) {
      bestmod<-"bv"
    }
    switch (bestmod,
      "bv" = {model <- bv},
      "vc" = {model <- vc}
    )

    model_formula <- model$gam$formula
    cat("\n\nbest model is\n",deparse(model_formula))

    # Now check if the interaction is significant using parametric bootstrap `(pbootstrap=T)`
    gamm_model(thisTable,
               model_formula,
               this_label = sprintf("%s %s final model",r,cv),
               smooth_var = "age",
               int_var = cv,
               group_var = "bblid",
               pbootstrap = T,
               model_test = F)

  }
}

Results for Putamen_h

NAR_Overall_Efficiency

Results for Putamen_h NAR_Overall_Efficiency M.E.

convurvity
para s(age) s(NAR_Overall_Efficiency)
worst 0.9718732 0.4416904 0.4008970
observed 0.9718732 0.3401784 0.1834552
estimate 0.9718732 0.3428022 0.3461640
Regression table from gamm in Putamen_h NAR_Overall_Efficiency M.E., BIC = 3412.52, obs = 1067
term estimate std.error statistic p.value
(Intercept) 16.9640954541874 0.19107845249858 88.780787327726 0
timepoint.L 0.153197738963919 0.495492846126982 0.309182544533971 0.757243662505699
timepoint.Q 0.18618333682846 0.36560739398768 0.509243904500285 0.610687663005984
timepoint.C 0.0824575819978033 0.178512036347206 0.461916090842318 0.644236637665198
oSex.L -0.0593799783407271 0.056024270223721 -1.05989739988769 0.289433639425036
term edf ref.df statistic p.value
s(age) 3 3 57.9559837154165 1.099010468818e-34
s(NAR_Overall_Efficiency) 2.99999999999999 2.99999999999999 2.2066659969151 0.0856957516751655

Comparing interaction models…

best model is Putamen_h ~ timepoint + oSex + s(age, k = 4, fx = T) + s(age, by = NAR_Overall_Efficiency, k = 4, fx = T)

Results for Putamen_h NAR_Overall_Efficiency final model

convurvity
para s(age) s(age):NAR_Overall_Efficiency
worst 0.9728761 0.5454464 0.5366853
observed 0.9728761 0.4972131 0.4513417
estimate 0.9728761 0.4943173 0.4777880
Regression table from gamm in Putamen_h NAR_Overall_Efficiency final model, BIC = 3411.55, obs = 1067
term estimate std.error statistic p.value
(Intercept) 16.8917066037838 0.194799823394821 86.7131515286214 0
timepoint.L 0.0818205246651944 0.495215414576723 0.165222087715361 0.868800833489742
timepoint.Q 0.126614359711642 0.365095370827026 0.346798041905684 0.728812208092882
timepoint.C 0.0596995807305022 0.178127691984599 0.335150475848886 0.737578205452899
oSex.L -0.057236626890966 0.0557542527912067 -1.02658764175911 0.304849989212817
term edf ref.df statistic p.value
s(age) 2.99999999999998 2.99999999999998 35.5854308842152 4.96099444686161e-22
s(age):NAR_Overall_Efficiency 4.00000000000001 4 3.66102933706076 0.00571354546138863

### NAR_F1_Social_Cognition_Efficiency

Results for Putamen_h NAR_F1_Social_Cognition_Efficiency M.E.

convurvity
para s(age) s(NAR_F1_Social_Cognition_Efficiency)
worst 0.9720741 0.3332461 0.2927798
observed 0.9720741 0.2979737 0.1593748
estimate 0.9720741 0.2890377 0.2556842
Regression table from gamm in Putamen_h NAR_F1_Social_Cognition_Efficiency M.E., BIC = 3415.17, obs = 1067
term estimate std.error statistic p.value
(Intercept) 16.9499019148066 0.191607627263226 88.4615198095494 0
timepoint.L 0.115240583524021 0.496366156549519 0.232168494977809 0.816452130959866
timepoint.Q 0.156982429610969 0.365153910655281 0.429907567823274 0.667350590496855
timepoint.C 0.0704308386708812 0.178203471409482 0.395227085722942 0.692755126412335
oSex.L -0.063230721964161 0.056222407703891 -1.12465339971175 0.260991503852891
term edf ref.df statistic p.value
s(age) 2.99999999999999 2.99999999999999 63.6969285137938 1.1090151988442e-38
s(NAR_F1_Social_Cognition_Efficiency) 2.99999999999999 2.99999999999999 1.3180497327979 0.267056369543889

Comparing interaction models…

best model is Putamen_h ~ timepoint + oSex + s(age, k = 4, fx = T) + s(age, by = NAR_F1_Social_Cognition_Efficiency, k = 4, fx = T)

Results for Putamen_h NAR_F1_Social_Cognition_Efficiency final model

convurvity
para s(age) s(age):NAR_F1_Social_Cognition_Efficiency
worst 0.9731073 0.4663956 0.4687256
observed 0.9731073 0.4468037 0.4092586
estimate 0.9731073 0.4374676 0.3870572
Regression table from gamm in Putamen_h NAR_F1_Social_Cognition_Efficiency final model, BIC = 3415.50, obs = 1067
term estimate std.error statistic p.value
(Intercept) 16.8768764482188 0.196003708747567 86.1048831986876 0
timepoint.L 0.0161656448007594 0.498023394943542 0.0324596092570953 0.974111665897196
timepoint.Q 0.080606449502637 0.366362092161434 0.220018531467274 0.825899287244041
timepoint.C 0.0453156464360474 0.178435390823321 0.253961090493068 0.799575103760973
oSex.L -0.0657923499915266 0.0559581708619602 -1.17574161160174 0.239963397974342
term edf ref.df statistic p.value
s(age) 2.99999999999998 2.99999999999998 44.1798043442418 3.77479651083285e-27
s(age):NAR_F1_Social_Cognition_Efficiency 4.00000000000001 4 2.66249153748449 0.0313598320078323

### NAR_F2_Complex_Reasoning_Efficiency

Results for Putamen_h NAR_F2_Complex_Reasoning_Efficiency M.E.

convurvity
para s(age) s(NAR_F2_Complex_Reasoning_Efficiency)
worst 0.9718918 0.2945251 0.1786761
observed 0.9718918 0.2717048 0.1480075
estimate 0.9718918 0.2640843 0.1642319
Regression table from gamm in Putamen_h NAR_F2_Complex_Reasoning_Efficiency M.E., BIC = 3413.37, obs = 1067
term estimate std.error statistic p.value
(Intercept) 16.9665792391014 0.190883357298468 88.8845391197317 0
timepoint.L 0.150002463671327 0.495006199006654 0.30303148520633 0.761925551935103
timepoint.Q 0.15178802091587 0.364976616915221 0.415884234444336 0.677579216041555
timepoint.C 0.0517823773328458 0.178094360223439 0.290758097380957 0.77129342758053
oSex.L -0.0469108688629576 0.056271822565255 -0.833647582829894 0.40466812503491
term edf ref.df statistic p.value
s(age) 2.99999999999999 2.99999999999999 64.8065835883413 2.46444471819691e-39
s(NAR_F2_Complex_Reasoning_Efficiency) 2.99999999999999 2.99999999999999 1.91742380908008 0.124989603505959

Comparing interaction models…

best model is Putamen_h ~ timepoint + oSex + s(age, k = 4, fx = T) + s(age, by = NAR_F2_Complex_Reasoning_Efficiency, k = 4, fx = T)

Results for Putamen_h NAR_F2_Complex_Reasoning_Efficiency final model

convurvity
para s(age) s(age):NAR_F2_Complex_Reasoning_Efficiency
worst 0.9721947 0.3981731 0.3521673
observed 0.9721947 0.3528812 0.1629076
estimate 0.9721947 0.3502766 0.2384207
Regression table from gamm in Putamen_h NAR_F2_Complex_Reasoning_Efficiency final model, BIC = 3410.41, obs = 1067
term estimate std.error statistic p.value
(Intercept) 16.9686246388585 0.191313283298937 88.6954859916555 0
timepoint.L 0.208327284482123 0.492925661486224 0.422634284962958 0.672648231259325
timepoint.Q 0.204636000743215 0.36413299562187 0.561981482600156 0.574248006732008
timepoint.C 0.0815260287408567 0.177770836781702 0.45860181690526 0.646614587991182
oSex.L -0.0477755852283671 0.0560424609832607 -0.852489066149989 0.394136187862349
term edf ref.df statistic p.value
s(age) 2.99999999999999 2.99999999999999 53.4014304671236 1.30958815159932e-32
s(age):NAR_F2_Complex_Reasoning_Efficiency 4.00000000000002 4 3.93386711389326 0.00355165292088365

### NAR_F3_Executive_Efficiency

Results for Putamen_h NAR_F3_Executive_Efficiency M.E.

convurvity
para s(age) s(NAR_F3_Executive_Efficiency)
worst 0.971778 0.5079742 0.4507879
observed 0.971778 0.4035207 0.0441854
estimate 0.971778 0.3980152 0.3953887
Regression table from gamm in Putamen_h NAR_F3_Executive_Efficiency M.E., BIC = 3414.82, obs = 1067
term estimate std.error statistic p.value
(Intercept) 16.9993746580928 0.190890175000003 89.0531671317947 0
timepoint.L 0.23111547506227 0.495062818286763 0.466840704907064 0.640710150331751
timepoint.Q 0.176573583579899 0.365000365354903 0.483762758451515 0.62865454599205
timepoint.C 0.0809055194724956 0.178176125023125 0.454076097243641 0.649867365965702
oSex.L -0.0579282016099435 0.0561123005394339 -1.03236190733676 0.302138981896942
term edf ref.df statistic p.value
s(age) 2.99999999999998 2.99999999999998 56.5263711013206 1.86581701703375e-34
s(NAR_F3_Executive_Efficiency) 3 3 1.43328545448684 0.231489523013777

Comparing interaction models…

best model is Putamen_h ~ timepoint + oSex + s(age, k = 4, fx = T) + s(age, by = NAR_F3_Executive_Efficiency, k = 4, fx = T)

Results for Putamen_h NAR_F3_Executive_Efficiency final model

convurvity
para s(age) s(age):NAR_F3_Executive_Efficiency
worst 0.9724708 0.5988118 0.5944483
observed 0.9724708 0.5273855 0.3925526
estimate 0.9724708 0.5190668 0.5110341
Regression table from gamm in Putamen_h NAR_F3_Executive_Efficiency final model, BIC = 3415.52, obs = 1067
term estimate std.error statistic p.value
(Intercept) 16.9290632726131 0.19321701809188 87.6168333400262 0
timepoint.L 0.140778253634304 0.495295678771188 0.284230732607177 0.776289340268548
timepoint.Q 0.124833744857213 0.365042196094913 0.341970726104102 0.73244103063904
timepoint.C 0.0463001118016715 0.178173654529744 0.259859472063207 0.795022876437736
oSex.L -0.0484352577613098 0.0559378143223821 -0.865876837485402 0.386754568021397
term edf ref.df statistic p.value
s(age) 3.00000000000002 3 35.4710891391769 7.65669845853247e-22
s(age):NAR_F3_Executive_Efficiency 4.00000000000003 4 2.65369901398951 0.0318228783401142

### NAR_F4_Memory_Efficiency

Results for Putamen_h NAR_F4_Memory_Efficiency M.E.

convurvity
para s(age) s(NAR_F4_Memory_Efficiency)
worst 0.9717821 0.3104867 0.1632110
observed 0.9717821 0.2893542 0.0942233
estimate 0.9717821 0.2749873 0.1272475
Regression table from gamm in Putamen_h NAR_F4_Memory_Efficiency M.E., BIC = 3416.06, obs = 1067
term estimate std.error statistic p.value
(Intercept) 16.979921614825 0.190600610952492 89.0863965753883 0
timepoint.L 0.189595426522223 0.494290502052286 0.383570846971621 0.70137375975867
timepoint.Q 0.185295269664508 0.365232732315672 0.507334784836196 0.612025867086226
timepoint.C 0.0848171144290412 0.178593463950647 0.47491723690672 0.634944168066508
oSex.L -0.0564737547009858 0.0561878790386208 -1.00508785288316 0.315084795721971
term edf ref.df statistic p.value
s(age) 3 3 65.2396514315123 1.05751050405678e-38
s(NAR_F4_Memory_Efficiency) 2.99999999999999 2.99999999999999 1.02002372858875 0.382902265176225

Comparing interaction models…

best model is Putamen_h ~ timepoint + oSex + s(age, k = 4, fx = T) + s(age, by = NAR_F4_Memory_Efficiency, k = 4, fx = T)

Results for Putamen_h NAR_F4_Memory_Efficiency final model

convurvity
para s(age) s(age):NAR_F4_Memory_Efficiency
worst 0.9721994 0.4696706 0.4041636
observed 0.9721994 0.3688750 0.2577590
estimate 0.9721994 0.3606478 0.2595430
Regression table from gamm in Putamen_h NAR_F4_Memory_Efficiency final model, BIC = 3422.00, obs = 1067
term estimate std.error statistic p.value
(Intercept) 16.9692774138662 0.190926870512715 88.87841385708 0
timepoint.L 0.157915720377196 0.492180445582302 0.320849236889866 0.748388188357601
timepoint.Q 0.137889646476784 0.363224592722462 0.379626405368825 0.70429915851729
timepoint.C 0.0580375607786316 0.177290740625206 0.327358104399392 0.743461993685597
oSex.L -0.054846163255636 0.0562520255977639 -0.975007791680592 0.329779897724949
term edf ref.df statistic p.value
s(age) 2.99999999999999 2.99999999999999 57.4095808392361 5.64110049193901e-35
s(age):NAR_F4_Memory_Efficiency 4.00000000000001 4 1.02767376286287 0.391772212974838

cog_vars <- c("NAR_Overall_Efficiency")

for (r in rois) {
  cat(sprintf("\n## Results for %s\n",r))
  for (cv in cog_vars) {
    cat(sprintf("\n### %s\n",cv))
    thisTable <- dataTable

    ## The logic here is to compare a bivariate interaction that varies by sex to a nested model that does not vary by sex
    cat('\n### Comparing interaction models...\n')
    # Bivariate interaction that just includes sex as a covariate
    bv1_formula <- sprintf(
      "%s ~ timepoint + oSex + te(age,%s, k=4, fx = T)",
      r,cv,cv,cv)
    # Bivariate that varies as a function of sex
    bv2_formula <- sprintf(
      "%s ~ timepoint + oSex + te(age,%s, k=4, fx = T) + te(age,%s, by = oSex, k=4, fx = T)",
      r,cv,cv)
  
    bv1 <- gamm(as.formula(bv1_formula),
               random = list(bblid=~1),
               data = thisTable)
    bv2 <- gamm(as.formula(bv2_formula),
               random = list(bblid=~1),
               data = thisTable)
    bic<-BIC(bv1$lme,bv2$lme) # get BIC
    bestmod <- gsub(row.names(bic)[which.min(bic$BIC)],pattern = "$lme",replacement = "", fixed = T) #best is min BIC
    bestmod<-"bv2"
    switch (bestmod,
      "bv1" = {model <- bv1},
      "bv2" = {model <- bv2}
    )
    model_formula <- model$gam$formula
    cat(sprintf("\n\nbest model is %s\n",deparse(model_formula)))
    a <- anova(model$gam)
    print(kable(a$pTerms.table)%>%kable_styling(position = "left"))
    print(kable(a$s.table)%>%kable_styling(position = "left"))
    
    # Now check if the interaction term from the chosen model is significant
    gamm_model(thisTable,
               model_formula,
               this_label = sprintf("%s %s final model",r,cv),
               smooth_var = "age",
               int_var = cv,
               group_var = "bblid",
               pbootstrap = F,
               model_test = F)
  }
}